import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
import torchvision
from torch.autograd import Variable
import matplotlib.pyplot as plt
import numpy as np
from google.colab import drive
import helper
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data import TensorDataset
import glob
import librosa
import math
import IPython.display as ipd
if torch.cuda.is_available():
device = torch.device("cuda")
else:
device = torch.device("cpu")
print(torch.cuda.is_available())
#### Part 1
from google.colab import files
uploaded = files.upload()
clean, srate=librosa.load('train_clean_male.wav', sr=None)
S_st=librosa.stft(clean, n_fft=1024, hop_length=512)
noisy, srate=librosa.load('train_dirty_male.wav', sr=None)
X_st=librosa.stft(noisy, n_fft=1024, hop_length=512)
S_st.shape
S_abs=np.abs(S_st)
X_abs=np.abs(X_st)
S= torch.tensor(np.transpose(S_abs)).cuda()
X = torch.tensor(np.transpose(X_abs)).cuda()
class CNN_problem1(nn.Module):
def __init__(self):
super(CNN_problem1,self).__init__()
self.first_conv=nn.Conv1d(1,10,kernel_size=5,stride=1)
nn.init.xavier_normal_(self.first_conv.weight)
self.relu=nn.ReLU()
self.pooling=nn.AvgPool1d(kernel_size=2,stride=2)
self.second_conv=nn.Conv1d(10,10,kernel_size=5,stride=1)
nn.init.xavier_normal_(self.second_conv.weight)
self.fc1=nn.Linear(125*10,900)
nn.init.xavier_normal_(self.fc1.weight)
self.fc2=nn.Linear(900,513)
nn.init.xavier_normal_(self.fc2.weight)
def Forward_prop(self,x):
x = x[:, np.newaxis,:]
x1=self.pooling(self.relu(self.first_conv(x)))
x2=self.pooling(self.relu(self.second_conv(x1)))
x2=x2.reshape(x2.shape[0],-1)
x3=self.relu(self.fc1(x2))
out=self.relu(self.fc2(x3))
return out
cnn_model1=CNN_problem1().cuda()
loss=nn.MSELoss()
param=cnn_model1.parameters()
optimize=torch.optim.Adam(param,lr=0.001)
batch_num=500
def train_model():
for i in range(0,len(X),batch_num):
X_batch=X[i:i+batch_num]
S_batch=S[i:i+batch_num]
ff_out=cnn_model1.Forward_prop(X_batch.cuda())
optimize.zero_grad()
losses=loss(ff_out,S_batch.cuda())
losses.backward()
optimize.step()
return losses.data
training_loss=[]
for i in range(600):
tr_l=train_model()
training_loss.append(tr_l)
print("epoch {} is done".format(i))
plt.plot(list(range(600)),training_loss)
s, sr=librosa.load('test_x_01.wav', sr=None)
X_t1=librosa.stft(s, n_fft=1024, hop_length=512)
xt1=librosa.stft(s, n_fft=1024, hop_length=512)
sa, sr=librosa.load('test_x_02.wav', sr=None)
X_t2=librosa.stft(sa, n_fft=1024, hop_length=512)
xt2=librosa.stft(sa, n_fft=1024, hop_length=512)
def test_model(x):
with torch.no_grad():
out=cnn_model1.Forward_prop(x)
return out
def reconstruct(x,o_inp):
test_out=test_model(x)
test_out=test_out.cpu().numpy()
x_den=x.cpu().numpy()
S_hat=(o_inp.T/x_den)*test_out
return S_hat
X_t1=np.abs(X_t1)
X_t1= torch.tensor(np.transpose(X_t1)).cuda()
X_t2=np.abs(X_t2)
X_t2= torch.tensor(np.transpose(X_t2)).cuda()
s1=reconstruct(X_t1,xt1)
s2=reconstruct(X_t2,xt2)
s1_re=librosa.istft(s1.T,hop_length=512,length =len(s))
s2_re=librosa.istft(s2.T,hop_length=512,length =len(sa))
librosa.output.write_wav('test_s_01_recons.wav', s1_re, sr)
librosa.output.write_wav('test_s_02_recons.wav', s2_re, sr)
train_n=reconstruct(X,X_st)
train_re=librosa.istft(train_n.T,hop_length=512,length =len(noisy))
librosa.output.write_wav('train_recons.wav', train_re, srate)
ipd.Audio('train_recons.wav') # reconstructed train signal
ipd.Audio('test_s_01_recons.wav') # reconstructed test 1 signal
ipd.Audio('test_s_02_recons.wav') # The reconstructed test 2 signal
def SNR(cs, ds):
cs,sr=librosa.load(cs, sr=None)
ds,sr=librosa.load(ds, sr=None)
A=np.dot(cs, cs.T)
B=np.dot(cs-ds,(cs-ds).T)
s=A/B
snr=10*(np.log10(s))
return snr
SNR('train_dirty_male.wav', 'train_recons.wav')
Some formulae used:
from google.colab import files
uploaded = files.upload()
clean, srate=librosa.load('train_clean_male.wav', sr=None)
S_st=librosa.stft(clean, n_fft=1024, hop_length=512)
noisy, srate=librosa.load('train_dirty_male.wav', sr=None)
X_st=librosa.stft(noisy, n_fft=1024, hop_length=512)
S_abs=np.abs(S_st)
X_abs=np.abs(X_st)
# S= torch.tensor(np.transpose(S_abs)).cuda()
# X = torch.tensor(np.transpose(X_abs)).cuda()
def frames_creation(inp):
out=[]
length=inp.shape[0]-19
for i in range(0,length):
s_f=inp[i:i+20,:]
out.append(s_f)
return np.array(out)
S_frame=torch.tensor(frames_creation(np.transpose(S_abs))).cuda()
X_frame=torch.tensor(frames_creation(np.transpose(X_abs))).cuda()
S_frame.shape
X_frame.shape
class CNN_problem2(nn.Module):
def __init__(self):
super(CNN_problem2,self).__init__()
self.first_conv=nn.Conv2d(1,10,kernel_size=5,stride=1)
nn.init.xavier_normal_(self.first_conv.weight)
self.relu=nn.ReLU()
self.pooling=nn.AvgPool2d(kernel_size=2,stride=2)
self.second_conv=nn.Conv2d(10,20,kernel_size=5,stride=1)
nn.init.xavier_normal_(self.second_conv.weight)
self.fc1=nn.Linear(125*20*2,900)
nn.init.xavier_normal_(self.fc1.weight)
self.fc2=nn.Linear(900,513)
nn.init.xavier_normal_(self.fc2.weight)
def Forward_prop(self,x):
x = x[:, np.newaxis,:]
x1=self.pooling(self.relu(self.first_conv(x)))
x2=self.pooling(self.relu(self.second_conv(x1)))
x2=x2.reshape(x2.shape[0],-1)
x3=self.relu(self.fc1(x2))
out=self.relu(self.fc2(x3))
return out
cnn_model2=CNN_problem2().cuda()
loss=nn.MSELoss()
param=cnn_model2.parameters()
optimize=torch.optim.Adam(param,lr=0.001)
batch_num=500
def train_model():
for i in range(0,len(X_frame),batch_num):
X_batch=X_frame[i:i+batch_num,:,:]
S_batch=S_frame[i:i+batch_num,:,:]
ff_out=cnn_model2.Forward_prop(X_batch.cuda())
optimize.zero_grad()
losses=loss(ff_out,S_batch[:,19,:].cuda())
losses.backward()
optimize.step()
return losses.data
training_loss=[]
for i in range(600):
tr_l=train_model()
training_loss.append(tr_l)
print("epoch {} is done".format(i))
plt.plot(list(range(600)),training_loss)
s, sr=librosa.load('test_x_01.wav', sr=None)
X_t1=librosa.stft(s, n_fft=1024, hop_length=512)
xt1=librosa.stft(s, n_fft=1024, hop_length=512)
sa, sr=librosa.load('test_x_02.wav', sr=None)
X_t2=librosa.stft(sa, n_fft=1024, hop_length=512)
xt2=librosa.stft(sa, n_fft=1024, hop_length=512)
def test_model(x):
with torch.no_grad():
out=cnn_model2.Forward_prop(x)
return out
#reconstruction
def rec(dirty):
sig,srate=librosa.load(dirty,sr=None)
di=librosa.stft(sig, n_fft=1024, hop_length=512)
di_abs=np.abs(di)
di_t=torch.tensor(frames_creation(np.transpose(di_abs))).cuda()
di_out=test_model(di_t)
fill=[0 for i in range(513)]
si_f=np.array([fill]*19)
recons_in=np.concatenate((si_f,di_out.cpu().numpy()))
recons=(di.T/np.abs(di.T))*recons_in
recr_sig=librosa.istft(recons.T, hop_length=512, length =len(sig))
return recr_sig
train=rec('train_dirty_male.wav')
librosa.output.write_wav('train_recons.wav', train, srate)
ipd.Audio('train_recons.wav')
test1=rec('test_x_01.wav')
librosa.output.write_wav('test1_recons.wav', test1, sr)
ipd.Audio('test1_recons.wav')
test2=rec('test_x_02.wav')
librosa.output.write_wav('test2_recons.wav', test2, sr)
ipd.Audio('test2_recons.wav')
def SNR(cs, ds):
cs,sr=librosa.load(cs, sr=None)
ds,sr=librosa.load(ds, sr=None)
A=np.dot(cs, cs.T)
B=np.dot(cs-ds,(cs-ds).T)
s=A/B
snr=10*(math.log10(s))
return snr
SNR('train_dirty_male.wav', 'train_recons.wav')
Some formulae used: